import geopandas as gpd


# define key land use variable
# which has different names across different years
col_name = {
    2009: 'Land_Use',
    2011: 'Land_Use',
    2012: 'Land_Use',
    2013: 'Land_Use',
    2014: 'Land_Use',
    2015: 'F2015Land_',
    2016: 'Land_Use',
    2017: 'Land_Use',
    2018: 'Land_Use',
}

# define correspondence for recoding PVWater categories
# across years for consistency
# in the format of {from: to}
recode_dict = {
    'Raspberries or Blackberries': 'Raspberries and Blackberries',
    'Nurseries/Flower/Subtropical Plants': (
        'Nurseries/Flowers/Tropical Plants'),
    'Nursuries/Flowers/Tropical Plants': (
        'Nurseries/Flowers/Tropical Plants'),
    'Vine/Grapes': 'Vines/Grapes',
    'Residnetial': 'Residential',
    'Blackberries': 'Raspberries and Blackberries',
    'Raspberries': 'Raspberries and Blackberries',
    'Artichoke': 'Artichokes',
    'Crop Cover': 'Cover Crop',
    'Greenhouse': 'Greenhouses',
    'Indsutrial': 'Industrial',
    'Deciduous (Apple Orhcards)': 'Deciduous (Apple Orchards)',
}


def load(year, file_name):
    """Loads data.

    Args:
        year (int): The year of interest.
        file_name (str): The path to input file.

    Returns:
        df (geopandas.GeoDataFrame): The loaded dataset.
    """
    df = (gpd.read_file(file_name)
          .filter(items=[col_name[year], 'geometry']))
    # rename for consistency
    df.columns = ['pvwater', 'geometry']
    # recode pv water categories
    df.loc[:, 'pvwater'] = df['pvwater'].replace(recode_dict)
    # drop empty and missing
    df = df.loc[~(df['geometry'].is_empty | df['geometry'].isna()), :]
    return df
